{"componentChunkName":"component---src-templates-bootcamp-js","path":"/bootcamp/bigdata/6-spark/","result":{"data":{"site":{"siteMetadata":{"title":"William Blogs & More","description":"team knowledge base"}},"allBootcamp":{"edges":[{"node":{"fields":{"id":"410a853e-19db-5ecf-9193-d90fd4e2b164","slug":"/README/","title":"README"}}},{"node":{"fields":{"id":"ebd8a7a9-4f93-5f7f-89b4-7a921fc850fd","slug":"/backend/DEV/","title":"DEV"}}},{"node":{"fields":{"id":"c97bf13a-dd3f-5e0b-9f0a-9c5f08a42d57","slug":"/backend/Restful/","title":"Restful"}}},{"node":{"fields":{"id":"81e9e69e-bf6a-51e5-843d-7b4321c330f9","slug":"/backend/curl/","title":"Curl"}}},{"node":{"fields":{"id":"2aa2ab91-e4c3-59d3-ae10-2949c78bbba2","slug":"/backend/express/","title":"Express"}}},{"node":{"fields":{"id":"923aee4e-1ea6-5b5c-9dbf-7f3ed2ccd8de","slug":"/backend/flow/","title":"Flow"}}},{"node":{"fields":{"id":"735e010f-fe92-58c2-a717-44e1379ec81a","slug":"/backend/microservice/","title":"Microservice"}}},{"node":{"fields":{"id":"5257ef00-003a-505c-9681-90b156b2fdc3","slug":"/backend/mongo/","title":"Mongo"}}},{"node":{"fields":{"id":"5d7e4d14-bbe0-51e7-ac00-aed822be865d","slug":"/backend/nodejs/","title":"Nodejs"}}},{"node":{"fields":{"id":"04e0f6cd-ba6d-5bcb-a711-f978053c85a9","slug":"/backend/pg/","title":"Pg"}}},{"node":{"fields":{"id":"07933914-1441-5d9a-93b3-7b8527f32834","slug":"/backend/rabbitmq/","title":"Rabbitmq"}}},{"node":{"fields":{"id":"aeee00c3-e65a-5e57-8e26-99bdee4b5972","slug":"/backend/redis/","title":"Redis"}}},{"node":{"fields":{"id":"5efa4932-4a43-5627-a52e-d6c44ea1ce37","slug":"/backend/socket/","title":"Socket"}}},{"node":{"fields":{"id":"2546451c-541b-5d1f-b34c-1f1842685cfc","slug":"/bestitconsulting/Bootcamp/1-bootcamp/","title":"1 Bootcamp"}}},{"node":{"fields":{"id":"3ce1ed0e-ce4c-53b6-89a3-dd5dc7642647","slug":"/bestitconsulting/Bootcamp/2-release_branch/","title":"2 Release_branch"}}},{"node":{"fields":{"id":"e3f26574-c4e6-5058-8336-6e761d369455","slug":"/bestitconsulting/Bootcamp/3-coding/","title":"3 Coding"}}},{"node":{"fields":{"id":"8b752412-b3d3-5bbe-87b3-d99ef91d2759","slug":"/bestitconsulting/Bootcamp/4-testing/","title":"4 Testing"}}},{"node":{"fields":{"id":"e950ea75-d6c1-5fca-8e1c-4b4fce362fab","slug":"/bestitconsulting/Bootcamp/5-CICD/","title":"5 CICD"}}},{"node":{"fields":{"id":"739b0238-56b4-5a47-bc76-3286eadcb425","slug":"/bestitconsulting/Bootcamp/6-BA/","title":"6 BA"}}},{"node":{"fields":{"id":"bc2e6ad3-6b39-54b0-8d66-e5e295f331ee","slug":"/bestitconsulting/Bootcamp/QA/","title":"QA"}}},{"node":{"fields":{"id":"484882b7-848a-591f-80d5-24d045cb9582","slug":"/bestitconsulting/Bootcamp/README/","title":"README"}}},{"node":{"fields":{"id":"27252ae9-1261-5b83-9039-45c41697c09d","slug":"/bestitconsulting/Bootcamp/Reference/","title":"Reference"}}},{"node":{"fields":{"id":"2a271bc5-0775-5cb0-99ab-056d5c0cf09f","slug":"/bestitconsulting/courses/agile/","title":"Agile"}}},{"node":{"fields":{"id":"372866f9-ffd2-5576-bbc1-415732492686","slug":"/bestitconsulting/courses/grow-admin/","title":"Grow Admin"}}},{"node":{"fields":{"id":"5eb7bdc4-c171-52bc-bf48-f413de03ea24","slug":"/bestitconsulting/courses/learn-suite/","title":"Learn Suite"}}},{"node":{"fields":{"id":"986b6381-1b35-5253-93d5-1294d4984202","slug":"/bestitconsulting/courses/security/","title":"Security"}}},{"node":{"fields":{"id":"8e3b3436-48d9-57f8-9d53-71494461a76e","slug":"/bestitconsulting/radar/github-radar-1/","title":"Github Radar 1"}}},{"node":{"fields":{"id":"f9b1b128-a118-5569-b2ee-b6d5c347e964","slug":"/bestitconsulting/radar/github-radar/","title":"Github Radar"}}},{"node":{"fields":{"id":"17a02ec4-b391-54f1-8c60-922492f1a5eb","slug":"/bigdata/1-bigdata/","title":"1 Bigdata"}}},{"node":{"fields":{"id":"36884170-cd04-548a-9a00-77e36a4b1380","slug":"/bigdata/2-hadoop/","title":"2 Hadoop"}}},{"node":{"fields":{"id":"c9cdff49-84d1-58e6-bf25-926b7f68d84c","slug":"/bigdata/201-kafka-4/","title":"201 Kafka 4"}}},{"node":{"fields":{"id":"f2387a56-a470-54c8-94aa-182fffbab8b0","slug":"/bigdata/201-nosql-8/","title":"201 Nosql 8"}}},{"node":{"fields":{"id":"4d3d0dd0-c311-51f6-9975-80fd4c5fa752","slug":"/bigdata/201-streaming-5/","title":"201 Streaming 5"}}},{"node":{"fields":{"id":"27a22e9e-6aa2-5c4e-9edf-c06b8740246d","slug":"/bigdata/3-hdfs/","title":"3 Hdfs"}}},{"node":{"fields":{"id":"f80ad3db-5700-5e3b-a5ae-2e6f5dc0c886","slug":"/bigdata/4-devops/","title":"4 Devops"}}},{"node":{"fields":{"id":"07672f34-d70e-589c-ba8e-67e284bc1d8c","slug":"/bigdata/5-hive/","title":"5 Hive"}}},{"node":{"fields":{"id":"bf3a9766-31fd-5213-8dd9-7c38ebe7bb80","slug":"/bigdata/6-spark/","title":"6 Spark"}}},{"node":{"fields":{"id":"4dab467f-b6ea-531b-8c28-9bcef71863e4","slug":"/bigdata/7-docker/","title":"7 Docker"}}},{"node":{"fields":{"id":"7ee4ff75-d3f8-5c1a-b701-facf126f3450","slug":"/bigdata/8-streaming/","title":"8 Streaming"}}},{"node":{"fields":{"id":"6785ce66-99e5-5e00-af30-b1c32c61cb12","slug":"/bigdata/9-elasticsearch/","title":"9 Elasticsearch"}}},{"node":{"fields":{"id":"9a59f47c-4cd3-51db-9962-2ffb26b14dc4","slug":"/bigdata/hdfs-kubernetes/","title":"Hdfs Kubernetes"}}},{"node":{"fields":{"id":"819a354c-d042-54db-9466-5063d21db45f","slug":"/bigdata/kubernetes/","title":"Kubernetes"}}},{"node":{"fields":{"id":"4535d21c-0f79-5cda-b235-98658a1ffe1f","slug":"/bigdata/spark-kubernetes/","title":"Spark Kubernetes"}}},{"node":{"fields":{"id":"07d1d0ca-dc16-52b5-b911-0d128a2388eb","slug":"/bigdata2/README/","title":"README"}}},{"node":{"fields":{"id":"605faba4-a685-5a80-a80a-5130700509e3","slug":"/bigdata2/TODO/","title":"TODO"}}},{"node":{"fields":{"id":"5b1a9498-e94f-5dba-a4fa-9aa2f3da4312","slug":"/bigdata2/editor/","title":"Editor"}}},{"node":{"fields":{"id":"6e1973a3-4045-57b9-a096-d498f5d2802b","slug":"/bigdata2/hadoop/","title":"Hadoop"}}},{"node":{"fields":{"id":"e63f21ad-65e0-56db-891e-60a89c5f93da","slug":"/bigdata2/kafka/","title":"Kafka"}}},{"node":{"fields":{"id":"2efc1fee-93bd-53eb-b5e0-672ff2acc9a4","slug":"/bigdata2/scala/","title":"Scala"}}},{"node":{"fields":{"id":"001e1d27-4cba-5926-ba33-8197e2065136","slug":"/bigdata2/spark/","title":"Spark"}}},{"node":{"fields":{"id":"2e2078b0-b734-5ae3-873c-022349c1c3cf","slug":"/bigdata2/vscode/","title":"Vscode"}}},{"node":{"fields":{"id":"b52de8e7-4791-556a-9989-b84717cd3370","slug":"/bigdata2/web-resource/","title":"Web Resource"}}},{"node":{"fields":{"id":"e0378d10-6064-560e-a6b2-cbe89a820d26","slug":"/bigdata2/zookeeper/","title":"Zookeeper"}}},{"node":{"fields":{"id":"616110f5-9073-5438-846b-ff7584d4e922","slug":"/cloud/aws/","title":"Aws"}}},{"node":{"fields":{"id":"c788368a-0f0e-5661-8fe9-c469d5a278d8","slug":"/cloud/azure/","title":"Azure"}}},{"node":{"fields":{"id":"30bec85f-fc28-5638-b42d-a952b414de7c","slug":"/cloud/elk/","title":"Elk"}}},{"node":{"fields":{"id":"57773e69-6a69-5e86-a641-af37ffe1ad40","slug":"/cloud/gcp/","title":"Gcp"}}},{"node":{"fields":{"id":"6688fe10-a20c-5479-bc80-170063d3e58c","slug":"/frontend/apollo/","title":"Apollo"}}},{"node":{"fields":{"id":"09e2c05f-9d10-5776-8fe8-44e9a565a20e","slug":"/frontend/graphql/","title":"Graphql"}}},{"node":{"fields":{"id":"28d38d20-c6e2-50ae-ac83-d3e3b9f25248","slug":"/frontend/proxy/","title":"Proxy"}}},{"node":{"fields":{"id":"ca4444ea-9796-5d40-98e1-c01d80f88221","slug":"/frontend/react-router/","title":"React Router"}}},{"node":{"fields":{"id":"61e06420-e9e4-5aa4-9d71-bf618be3f4fe","slug":"/frontend/react/","title":"React"}}},{"node":{"fields":{"id":"9d95393b-96be-5065-83bb-536b746eed21","slug":"/frontend/redux/","title":"Redux"}}},{"node":{"fields":{"id":"e3bb0f44-4819-556c-8511-7402303e36c5","slug":"/misc/0428/","title":"0428"}}},{"node":{"fields":{"id":"dfcc8a86-17af-5c8c-b831-9fe4fa51bf0e","slug":"/misc/HISTORY/","title":"HISTORY"}}},{"node":{"fields":{"id":"6f004260-a55d-5a2d-8146-d197509fd980","slug":"/misc/TODO/","title":"TODO"}}},{"node":{"fields":{"id":"30096e9c-2593-5fc8-80d5-1f7648898888","slug":"/misc/git/","title":"Git"}}},{"node":{"fields":{"id":"bed21539-b0d7-5936-9c15-96905db68ac9","slug":"/misc/gitlab/","title":"Gitlab"}}},{"node":{"fields":{"id":"ab43544c-0ef1-56d2-a0a3-035fdd1cf7fe","slug":"/misc/installation/","title":"Installation"}}},{"node":{"fields":{"id":"b2e02e77-9623-5967-a541-fe1829315b47","slug":"/misc/misc/","title":"Misc"}}},{"node":{"fields":{"id":"4c7d96dc-fddc-52fd-9333-9fa6d2633874","slug":"/misc/vocabulary/","title":"Vocabulary"}}},{"node":{"fields":{"id":"f2a8cc1a-8b27-541c-b68e-8c5dc6d0f6b3","slug":"/poc/courses/agile/","title":"Agile"}}},{"node":{"fields":{"id":"ae6e68d8-0f75-5fac-ad96-f042bddd50aa","slug":"/poc/courses/grow-admin/","title":"Grow Admin"}}},{"node":{"fields":{"id":"689b1b72-e7b6-5b07-9890-50f653fdd7df","slug":"/poc/courses/learn-suite/","title":"Learn Suite"}}},{"node":{"fields":{"id":"003c5b81-20aa-57aa-b268-17aaaa338e68","slug":"/poc/courses/security/","title":"Security"}}},{"node":{"fields":{"id":"e0f53ac9-fbe0-5c72-8b69-ee2cce387f90","slug":"/poc/radar/github-radar-1/","title":"Github Radar 1"}}},{"node":{"fields":{"id":"c5718e38-5cc1-566e-a649-a4d501877995","slug":"/poc/radar/github-radar/","title":"Github Radar"}}},{"node":{"fields":{"id":"5a68cfb3-1b53-5527-a6ad-b8277381fab4","slug":"/poc/radar/steps/","title":"Steps"}}}]},"bootcamp":{"fields":{"id":"bf3a9766-31fd-5213-8dd9-7c38ebe7bb80","title":"6 Spark","slug":"/bigdata/6-spark/","tag":"bigdata","category":"bootcamp","date":"2020-12-17","size":6392},"html":"<h1 id=\"spark\" style=\"position:relative;\"><a href=\"#spark\" aria-label=\"spark permalink\" class=\"anchor before\"><svg aria-hidden=\"true\" focusable=\"false\" height=\"16\" version=\"1.1\" viewBox=\"0 0 16 16\" width=\"16\"><path fill-rule=\"evenodd\" d=\"M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z\"></path></svg></a>Spark</h1>\n<p>Apache Spark is an open-source cluster-computing framework designed for speed and ease of use.  </p>\n<ul>\n<li>Everything in Memory</li>\n<li>Up to 100x faster than MapReduce</li>\n<li>Runs on Hadoop, Mesos, standalone, or in the cloud</li>\n<li>Support for many programming langugages</li>\n</ul>\n<h2 id=\"1-spark-vs-hadoop-mapreduce\" style=\"position:relative;\"><a href=\"#1-spark-vs-hadoop-mapreduce\" aria-label=\"1 spark vs hadoop mapreduce permalink\" class=\"anchor before\"><svg aria-hidden=\"true\" focusable=\"false\" height=\"16\" version=\"1.1\" viewBox=\"0 0 16 16\" width=\"16\"><path fill-rule=\"evenodd\" d=\"M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z\"></path></svg></a>1. Spark vs. Hadoop MapReduce</h2>\n<table>\n<thead>\n<tr>\n<th></th>\n<th>Hadoop MapReduce</th>\n<th>Apache Spark</th>\n</tr>\n</thead>\n<tbody>\n<tr>\n<td><strong>Language Support</strong></td>\n<td>Java,C/C++,Ruby,Pythong,…</td>\n<td>Scala,Java,Python,R,SQL</td>\n</tr>\n<tr>\n<td><strong>Developed</strong></td>\n<td>Java</td>\n<td>Scala</td>\n</tr>\n<tr>\n<td><strong>Latency</strong></td>\n<td>disk oriented</td>\n<td>memory oriented</td>\n</tr>\n<tr>\n<td><strong>Category</strong></td>\n<td>data processing engine</td>\n<td>data analytics engine</td>\n</tr>\n<tr>\n<td><strong>data processing</strong></td>\n<td>batch</td>\n<td>batch,streaming</td>\n</tr>\n<tr>\n<td><strong>fault tolerance</strong></td>\n<td>replication</td>\n<td>RDD</td>\n</tr>\n</tbody>\n</table>\n<p><span\n      class=\"gatsby-resp-image-wrapper\"\n      style=\"position: relative; display: block; margin-left: auto; margin-right: auto; max-width: 600px; \"\n    >\n      <a\n    class=\"gatsby-resp-image-link\"\n    href=\"/static/d006811533b305a035e9fad411de0a27/bb27a/spark2.png\"\n    style=\"display: block\"\n    target=\"_blank\"\n    rel=\"noopener\"\n  >\n    <span\n    class=\"gatsby-resp-image-background-image\"\n    style=\"padding-bottom: 58.666666666666664%; position: relative; bottom: 0; left: 0; background-image: url('data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAAMCAYAAABiDJ37AAAACXBIWXMAAA7DAAAOwwHHb6hkAAACCElEQVQoz11TyW7UQBDt//+FSTgEcYHAEaQcQAkEOI3EIbN4GybjfV8yGdtt+1HVHgcrLZXLVdX1ulYRhiHyLIfn+QiCECGRYztwXQ9RGEE2EuiBoRsU9WcaXtGkF+j4NsiJPvzPnIl1cpIxEuumM8x0M714o/m4XNtYPBxwsbLV/8V65B/MANdEn3YhPpo+rjYOFnRnsTrgnebh2grw3vBwtR31l0Ti7e8/+PYY4VdY4YeX485JX+hGf8SNtsfnlYG7fYCfXoZbO8F3N8PXnYcvawv3Tox7km+dRPkIa7NGTnWMHAdPaYL5SQIPz0UOtO1Yjtlpqgo10esjNlsNu797PKzWcKkx7NfKDnXTIisKNK1ER8VmXduOJEnOixKnuoGkOjftf5vQNB2GaSKKEwzcF7qgAMiY5wV67gv3jftEvKMXmbOtqo4qKmU/3xMrisy0dgRqIWbQfgSVFBHLBUXCznEcI00zNDRGDM66kMaKeZqkyGj0yvIJQtcNaLquHOu6VbPEETIoA0ZRDN8P1JzyP6fIj5Zl9WJjYObH4wliq2nqFRX6GYjT4gjZwXVdFRnLLUXH9p5mkwF5EXzfBy9HQzVnmzAtC4eDTSFnqgZyDkhpqoskT5FLBTimzFkNyqc72wlwuVzCMEz1ej9bIX6N07RpDU/P9Vh0tslxs7hm9nlFOYPJ7x8Xz4+aTaS4jgAAAABJRU5ErkJggg=='); background-size: cover; display: block;\"\n  ></span>\n  <img\n        class=\"gatsby-resp-image-image\"\n        alt=\"Spark vs Hadoop MapReduce\"\n        title=\"Spark vs Hadoop MapReduce\"\n        src=\"/static/d006811533b305a035e9fad411de0a27/0a47e/spark2.png\"\n        srcset=\"/static/d006811533b305a035e9fad411de0a27/8a4e8/spark2.png 150w,\n/static/d006811533b305a035e9fad411de0a27/5a46d/spark2.png 300w,\n/static/d006811533b305a035e9fad411de0a27/0a47e/spark2.png 600w,\n/static/d006811533b305a035e9fad411de0a27/1cfc2/spark2.png 900w,\n/static/d006811533b305a035e9fad411de0a27/c1b63/spark2.png 1200w,\n/static/d006811533b305a035e9fad411de0a27/bb27a/spark2.png 1371w\"\n        sizes=\"(max-width: 600px) 100vw, 600px\"\n        style=\"width:100%;height:100%;margin:0;vertical-align:middle;position:absolute;top:0;left:0;\"\n        loading=\"lazy\"\n      />\n  </a>\n    </span>\n<span\n      class=\"gatsby-resp-image-wrapper\"\n      style=\"position: relative; display: block; margin-left: auto; margin-right: auto; max-width: 600px; \"\n    >\n      <a\n    class=\"gatsby-resp-image-link\"\n    href=\"/static/37ec11f6ba8affac2a325c88ef6262f4/062c8/spark3.png\"\n    style=\"display: block\"\n    target=\"_blank\"\n    rel=\"noopener\"\n  >\n    <span\n    class=\"gatsby-resp-image-background-image\"\n    style=\"padding-bottom: 43.99999999999999%; position: relative; bottom: 0; left: 0; background-image: url('data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAAJCAYAAAAywQxIAAAACXBIWXMAAA7DAAAOwwHHb6hkAAABqklEQVQoz1VRa0+kQBDkJ5tocvvL7vziJfdF18T4Sk7dW2AXUFfe7KwMDAyU3T0bkyNUCF3V1V0z3uJ6DcaPq9X/WP7D6cUtTn4tHX5e4vT3nXAL4s7+POLk/FrA9cXSeXhXz2vc+FskSuP10CPea0GiOvxNdrgPIzxuEtyR5un1Q+qsW6clHjYxHsIY98Q/vedIDh28aLNBEm3xliRQdQ15phmgtylL6M9W/u0wAnai+iySvtWoigJ1SaAvrJUebxvF8MMQzy8rNHslPcNoMY4TqqZBRuKirJDlOfreSN3aGXt1oFqB3owwA+mpcSCODCP4QYAgCKF1j4kMLRETNdV1g/fdDi1tMw5uCHMzaRQZ1s1edG6I6/FCihzFMZl1ImZCDClmKZsVGMhM6t8c8ElHwVunaSboul6GemHoDDk/R7HHLXgaGxZFiaqqJS5vxpr5aJhmOUrichrKOtZ7AcV9Wa2gKZYzs4KJtmFR27YUycKYAYZMGbw9R67oEge6LGOMaLjP830fgZxhgIguyJ2F21ApRaaFgGNlGUdMxURrfaxlcta8Nfd9AdchoHFkycniAAAAAElFTkSuQmCC'); background-size: cover; display: block;\"\n  ></span>\n  <img\n        class=\"gatsby-resp-image-image\"\n        alt=\"Spark vs Hadoop MapReduce\"\n        title=\"Spark vs Hadoop MapReduce\"\n        src=\"/static/37ec11f6ba8affac2a325c88ef6262f4/0a47e/spark3.png\"\n        srcset=\"/static/37ec11f6ba8affac2a325c88ef6262f4/8a4e8/spark3.png 150w,\n/static/37ec11f6ba8affac2a325c88ef6262f4/5a46d/spark3.png 300w,\n/static/37ec11f6ba8affac2a325c88ef6262f4/0a47e/spark3.png 600w,\n/static/37ec11f6ba8affac2a325c88ef6262f4/1cfc2/spark3.png 900w,\n/static/37ec11f6ba8affac2a325c88ef6262f4/c1b63/spark3.png 1200w,\n/static/37ec11f6ba8affac2a325c88ef6262f4/062c8/spark3.png 1386w\"\n        sizes=\"(max-width: 600px) 100vw, 600px\"\n        style=\"width:100%;height:100%;margin:0;vertical-align:middle;position:absolute;top:0;left:0;\"\n        loading=\"lazy\"\n      />\n  </a>\n    </span></p>\n<h2 id=\"2-spark-architecture-and-components\" style=\"position:relative;\"><a href=\"#2-spark-architecture-and-components\" aria-label=\"2 spark architecture and components permalink\" class=\"anchor before\"><svg aria-hidden=\"true\" focusable=\"false\" height=\"16\" version=\"1.1\" viewBox=\"0 0 16 16\" width=\"16\"><path fill-rule=\"evenodd\" d=\"M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z\"></path></svg></a>2. Spark Architecture and Components</h2>\n<p><span\n      class=\"gatsby-resp-image-wrapper\"\n      style=\"position: relative; display: block; margin-left: auto; margin-right: auto; max-width: 600px; \"\n    >\n      <a\n    class=\"gatsby-resp-image-link\"\n    href=\"/static/80a81700d5d47f6b1d6649fa1ee102c1/6a6e9/spark4.png\"\n    style=\"display: block\"\n    target=\"_blank\"\n    rel=\"noopener\"\n  >\n    <span\n    class=\"gatsby-resp-image-background-image\"\n    style=\"padding-bottom: 46%; position: relative; bottom: 0; left: 0; background-image: url('data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAAJCAYAAAAywQxIAAAACXBIWXMAAA7DAAAOwwHHb6hkAAAB3UlEQVQoz32SS0/bQBSF/f/X3XXRRRdVRdmgViHQqoWUEojAJSgPYvKwHcfPie2x82pKvl4jqsKmlo7unHPP3LmeO0aWZpS6RGf6EUVe8LB9gN87dk94vv4fKp+BxBef8Mfkk7574N+G57a/2u6lZrjFipuxQ7PTpzsNSFa/mOkFV9aYVs/CHNr4ekmQL2i2O1x27rjoDnBTjVpvaQ8ntG67XPfusLMSIy7XuNEcO0yYJhm5FMyWG2ZpgSN6KMW1bKx0RzyeeCqvKpdo0dxI4fgRThDLoVLw2I448lKOK7iKQycRKOqTiNpwxkHf5t1lm/etW2rWlE8Dl7odUxffkRNzNFWCVDDnWDTjYBTy3fb5agfsj2NO3Ygfgr17n9ftEbXuiMPOPR/6Dq9Mi/2ezcnQ5cxTvB340kRC00s4Ff6m72FUCdOLuZopTrz5Y7zxFY2ZdCynmkEqfM55VPBROj8LNdfCzSinLn90Eea045zrWFOTvJEu1ihBUq7IJKZyf0FWEMt95MJXZUmRphRKsZX1MpdnVvH5nLXWLLIcLbmKr7IUo+PHmJMpLZnqXahwZZqNdpfGbY+JTFKP+tgXJ4zPv5Hfd9nYAwLznPBnk41jsZC81fiMdfaFtHPFH8nNmW5Kd4EbAAAAAElFTkSuQmCC'); background-size: cover; display: block;\"\n  ></span>\n  <img\n        class=\"gatsby-resp-image-image\"\n        alt=\"Spark components\"\n        title=\"Spark components\"\n        src=\"/static/80a81700d5d47f6b1d6649fa1ee102c1/0a47e/spark4.png\"\n        srcset=\"/static/80a81700d5d47f6b1d6649fa1ee102c1/8a4e8/spark4.png 150w,\n/static/80a81700d5d47f6b1d6649fa1ee102c1/5a46d/spark4.png 300w,\n/static/80a81700d5d47f6b1d6649fa1ee102c1/0a47e/spark4.png 600w,\n/static/80a81700d5d47f6b1d6649fa1ee102c1/6a6e9/spark4.png 826w\"\n        sizes=\"(max-width: 600px) 100vw, 600px\"\n        style=\"width:100%;height:100%;margin:0;vertical-align:middle;position:absolute;top:0;left:0;\"\n        loading=\"lazy\"\n      />\n  </a>\n    </span></p>\n<ul>\n<li><strong>Spark Core</strong> - The heart of Apache Spark is the Spark Core. It provides the distributed task dispatching, scheduling, basic input and output operations, and the RDD abstraction and APIs to manipulate it. It interacts with its scheduler to schedule tasks and it interacts with a cluster manager to send tasks to machines to be executed. The few cluster managers (Apache Mesos, Hadoop YARN, and recently Kubernetes), manage the underlying data that we want to analyze. </li>\n<li><strong>Spark SQL</strong> – A new component which replaces the older Shark (SQL on Spark) project, this package provides better integration with Spark Core, it allows querying data through SQL and HiveQL and supports many data sources from Hive tables, Parquet and JSON. Spark SQL also allows developers to intermix SQL queries with the code for data manipulations with RDDs in Python, Java, and Scala. It also provides fast SQL connectivity to BI tools like Tableau or QlikView.</li>\n<li><strong>Spark Streaming</strong> – based on micro-batching, this component enables processing of real-time streaming data. It uses DStreams, which are series of RDDs, to process real-time data. The Spark Streaming API is very similar to the Spark Core RDD APIs, making it easy for developers to reuse and adapt code for batch to interactive or real-time applications. </li>\n<li><strong>MLlib</strong> – provides a library of machine learning algorithms including classification, regression, clustering, and collaborative filtering, as well as model evaluation and data import. </li>\n<li><strong>GraphFrames</strong> – which provides dataframe-based graphs. It aims to provide both the functionality of GraphX (which is now deprecated) and extended functionality taking advantage of spark data frames. This extended functionality includes motif finding, dataframe-based serialization and highly expressive graph queries.</li>\n</ul>\n<p><span\n      class=\"gatsby-resp-image-wrapper\"\n      style=\"position: relative; display: block; margin-left: auto; margin-right: auto; max-width: 600px; \"\n    >\n      <a\n    class=\"gatsby-resp-image-link\"\n    href=\"/static/856a81918405486bb557a7f0ba835653/6295b/spark5.png\"\n    style=\"display: block\"\n    target=\"_blank\"\n    rel=\"noopener\"\n  >\n    <span\n    class=\"gatsby-resp-image-background-image\"\n    style=\"padding-bottom: 55.99999999999999%; position: relative; bottom: 0; left: 0; background-image: url('data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAALCAYAAAB/Ca1DAAAACXBIWXMAAA7DAAAOwwHHb6hkAAABu0lEQVQoz4VS227TUBDM//8JPCAk4AEQoFYUCoIAagltQ0pd5+LYPrZjx/bxdTq7LlFaVWBp5PXu2ZnZ9RkFfoAkTmBCs4MtLdABfdv/E90DGNVVjaZqdpDvtmn/S4aOuP8wNRIn6mYP2EP/gFNpLCj+/trHwdUaX5cG45XBzGQYDWr3hJizbJCmqm7puFPUjOW7Ybzc5HgxXeDYDXG6TvDB9TEmsTqUw9Mw1cJvk+IsSHAZZSrkZSV+eDFOfallei6nUJRb5iMsSOwmW0zNBj95Rglt1arao4mD53w/OXPx9o+no4nIY+afnrt4OVtq3Ym3CLeljvx5Eei4EouwEspo42VE9QwTf6M4ngcoKTSh6pvLFQ6cNd5deSRdwc+srqZteoWsIC1rWK5DCXPb4AsJz2/HFuK/hCdUFfevSPSaxOLwOsmRkeCjGxDDDqVfVqKEdd3RdsSkwXfu5Yj2v3EMGdllszRKTSCx7C8uKjy7mOPQGUY9dDyt7/6yvNvba1FyBVs7jCAuizto9NwqLXGyjuGlBeMCM/7EX8FmcLi7OnuQpkavS38HkpO6oUvZq6zm0zzEkStTRbgBEe9FFE3y3sUAAAAASUVORK5CYII='); background-size: cover; display: block;\"\n  ></span>\n  <img\n        class=\"gatsby-resp-image-image\"\n        alt=\"Spark Architecture\"\n        title=\"Spark Architecture\"\n        src=\"/static/856a81918405486bb557a7f0ba835653/0a47e/spark5.png\"\n        srcset=\"/static/856a81918405486bb557a7f0ba835653/8a4e8/spark5.png 150w,\n/static/856a81918405486bb557a7f0ba835653/5a46d/spark5.png 300w,\n/static/856a81918405486bb557a7f0ba835653/0a47e/spark5.png 600w,\n/static/856a81918405486bb557a7f0ba835653/1cfc2/spark5.png 900w,\n/static/856a81918405486bb557a7f0ba835653/6295b/spark5.png 919w\"\n        sizes=\"(max-width: 600px) 100vw, 600px\"\n        style=\"width:100%;height:100%;margin:0;vertical-align:middle;position:absolute;top:0;left:0;\"\n        loading=\"lazy\"\n      />\n  </a>\n    </span></p>\n<p><span\n      class=\"gatsby-resp-image-wrapper\"\n      style=\"position: relative; display: block; margin-left: auto; margin-right: auto; max-width: 600px; \"\n    >\n      <a\n    class=\"gatsby-resp-image-link\"\n    href=\"/static/eeea97e303da3edd06b32319864ad7cf/9a1cf/spark6.png\"\n    style=\"display: block\"\n    target=\"_blank\"\n    rel=\"noopener\"\n  >\n    <span\n    class=\"gatsby-resp-image-background-image\"\n    style=\"padding-bottom: 48.666666666666664%; position: relative; bottom: 0; left: 0; background-image: url('data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAAKCAYAAAC0VX7mAAAACXBIWXMAAA7DAAAOwwHHb6hkAAABaUlEQVQoz6VS2U4CQRDk/7/F+K6JR4yICIpngKAcywLLzuzJ3lfZPcsRH4xGHyoz1Z2p6aqZhhQSwhTIswIogaqo/oVGmZcoCOUfBVBWapAaJLgnFfaN34sBKTkLkgxBnKm9EhxJH01N4G4h4YTpF/Gf0JoLHPU1HA809Ay7FnwyHLQXAs25iamzUREkaYGcVgbfzJxX5llW73mq+5WF57WDa81Eh/ZK8JUKPSJtXWJk+QjJggxizJwAmhvADhP4cQqxreleCI+4DGN0lhb6wsMNTdrdCb6QYHcp0dIFBsJVh8e2j9OPJc7GK8zdDUURU8/BCdWuZmusNxEMP1RDvJkuRWaShn0QHFoeHlY2JnZAlkpESY6IbEZpvcZqX/OQesx9ssxn3skVD7MXZMsD6VLTUhb5C3FGGeelMjxA8W09JPHHreCtzo9q7R7FxuXUwAWBBTl4niDJvgdfyJPyz+D8zicGhvRbPgHy6//asCSlzwAAAABJRU5ErkJggg=='); background-size: cover; display: block;\"\n  ></span>\n  <img\n        class=\"gatsby-resp-image-image\"\n        alt=\"Job terms\"\n        title=\"Job terms\"\n        src=\"/static/eeea97e303da3edd06b32319864ad7cf/0a47e/spark6.png\"\n        srcset=\"/static/eeea97e303da3edd06b32319864ad7cf/8a4e8/spark6.png 150w,\n/static/eeea97e303da3edd06b32319864ad7cf/5a46d/spark6.png 300w,\n/static/eeea97e303da3edd06b32319864ad7cf/0a47e/spark6.png 600w,\n/static/eeea97e303da3edd06b32319864ad7cf/1cfc2/spark6.png 900w,\n/static/eeea97e303da3edd06b32319864ad7cf/9a1cf/spark6.png 924w\"\n        sizes=\"(max-width: 600px) 100vw, 600px\"\n        style=\"width:100%;height:100%;margin:0;vertical-align:middle;position:absolute;top:0;left:0;\"\n        loading=\"lazy\"\n      />\n  </a>\n    </span></p>\n<h2 id=\"spark-installation\" style=\"position:relative;\"><a href=\"#spark-installation\" aria-label=\"spark installation permalink\" class=\"anchor before\"><svg aria-hidden=\"true\" focusable=\"false\" height=\"16\" version=\"1.1\" viewBox=\"0 0 16 16\" width=\"16\"><path fill-rule=\"evenodd\" d=\"M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z\"></path></svg></a>Spark Installation</h2>\n<ul>\n<li>Java SE Development Kit 8u161 - JDK v1.8.0</li>\n<li>gradle, maven</li>\n<li>scalar, swift, kotlin</li>\n<li>artifacts</li>\n</ul>\n<h3 id=\"scala-vs-kotlin\" style=\"position:relative;\"><a href=\"#scala-vs-kotlin\" aria-label=\"scala vs kotlin permalink\" class=\"anchor before\"><svg aria-hidden=\"true\" focusable=\"false\" height=\"16\" version=\"1.1\" viewBox=\"0 0 16 16\" width=\"16\"><path fill-rule=\"evenodd\" d=\"M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z\"></path></svg></a>Scala vs Kotlin:</h3>\n<blockquote>\n<p>Scala and Kotlin are in quite a tug of war. Scala has the edge over Kotlin in some ways, but Kotlin is just as formidable in others. The main differences — where the two languages set themselves apart — is that Kotlin is more like a better version of Java, while Scala is an entirely different kind of Java, so to speak.</p>\n</blockquote>\n<h2 id=\"rdd\" style=\"position:relative;\"><a href=\"#rdd\" aria-label=\"rdd permalink\" class=\"anchor before\"><svg aria-hidden=\"true\" focusable=\"false\" height=\"16\" version=\"1.1\" viewBox=\"0 0 16 16\" width=\"16\"><path fill-rule=\"evenodd\" d=\"M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z\"></path></svg></a>RDD</h2>\n<blockquote>\n<p>RDD (Resilient Distributed Dataset) are fault-rolerant, parallel data structures that let users explicitly persist intermediate results in memory, control their partitioning to optimize data placement, and manipulate them using a rich set of operators.</p>\n</blockquote>\n<p><span\n      class=\"gatsby-resp-image-wrapper\"\n      style=\"position: relative; display: block; margin-left: auto; margin-right: auto; max-width: 600px; \"\n    >\n      <a\n    class=\"gatsby-resp-image-link\"\n    href=\"/static/321dbf4a579877e63572ad982b5b411b/58bb7/rdd-features.png\"\n    style=\"display: block\"\n    target=\"_blank\"\n    rel=\"noopener\"\n  >\n    <span\n    class=\"gatsby-resp-image-background-image\"\n    style=\"padding-bottom: 70%; position: relative; bottom: 0; left: 0; background-image: url('data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAAOCAYAAAAvxDzwAAAACXBIWXMAAA7DAAAOwwHHb6hkAAACmklEQVQ4y22UW08TURDH+wF90C/hu34DniTRmJiQ+GIgxhgjKD7ITYoJBCiXIpRLoWsv23ZL6W13u7vddrvt7s9ztgWLMslkTubM/M9cT4whhIHkEKQchoJh4A/RWzqWaaA3m5i6Qb3ewGpbE3YjlhhSJ2Vs8uLO4JaCgGPVYOGwSqluC4dg5BjCQ36SY9z6h2MWdFG2+XHeomb2+HnZYksxiKcNOo5LtVLFsZz7PhMYsX6vT8d2cOwOfbdDodrmTbzMh0SNd5sa+ZoAF2Cpoomaz3NduUZRFGFviwc6dDsurpCOwHDFOeb1PGzLjgC7jkOl5fF6vcbMhsZ8shU9nFWrkZRgzXqTYkGj5/YjEAkqA5K1jQCZKFlE3Sxq6ZyDgks2W2BjPR5FtrqyKtK9odUyUbQzdOeGhyg26A9E2B1EoPh6kmFuCrQZ6F+RTJ5xcXbOXmKf3Z0EpbyGZlyxeDTD8uksNb3MwBvSdbt4XW/UFN/zMQ2dttWjW08QKs8I1WnoZTlJZURqLleXCrnjLSr5AmVXYzOzyEpqjnKjEJXKNEwsyxp3OfjbpUC23k5TVde4VFNi9kxOfp2wvLTCcfIA12rTziS4yG1T0tXRCE10W85zLBpsAeT3/UjZNHWWdr+wnVpj9zQe2RvNRiQrC1MoLx+jzT0XJRkQDAJkhvfnUM6quNB1Hafd4SxzyLfNObLlNJ83Zhnk9vAPP9JTj8i+fcrvV09Iv3iEUS2hG22ajQZDsVW32xNtyl3aYz7J7PN9Z556rYi/O4u39x7v8BNWWaH4dRr9cm9co7HPGIwo5X/3MvhnigpJ7J05XC19T//fut5GOPoc7isDsbNSylTapkX9pkpLNMj3+tE+h4Mg+kAY+4Xjj0HKP9ngG92kjwmlAAAAAElFTkSuQmCC'); background-size: cover; display: block;\"\n  ></span>\n  <img\n        class=\"gatsby-resp-image-image\"\n        alt=\"Rdd Features\"\n        title=\"Rdd Features\"\n        src=\"/static/321dbf4a579877e63572ad982b5b411b/0a47e/rdd-features.png\"\n        srcset=\"/static/321dbf4a579877e63572ad982b5b411b/8a4e8/rdd-features.png 150w,\n/static/321dbf4a579877e63572ad982b5b411b/5a46d/rdd-features.png 300w,\n/static/321dbf4a579877e63572ad982b5b411b/0a47e/rdd-features.png 600w,\n/static/321dbf4a579877e63572ad982b5b411b/1cfc2/rdd-features.png 900w,\n/static/321dbf4a579877e63572ad982b5b411b/58bb7/rdd-features.png 985w\"\n        sizes=\"(max-width: 600px) 100vw, 600px\"\n        style=\"width:100%;height:100%;margin:0;vertical-align:middle;position:absolute;top:0;left:0;\"\n        loading=\"lazy\"\n      />\n  </a>\n    </span></p>\n<p><span\n      class=\"gatsby-resp-image-wrapper\"\n      style=\"position: relative; display: block; margin-left: auto; margin-right: auto; max-width: 600px; \"\n    >\n      <a\n    class=\"gatsby-resp-image-link\"\n    href=\"/static/f38e45df01c1e23d51659e92c82a4c85/02cd5/spark-stream.png\"\n    style=\"display: block\"\n    target=\"_blank\"\n    rel=\"noopener\"\n  >\n    <span\n    class=\"gatsby-resp-image-background-image\"\n    style=\"padding-bottom: 40%; position: relative; bottom: 0; left: 0; background-image: url('data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAAICAYAAAD5nd/tAAAACXBIWXMAAA7DAAAOwwHHb6hkAAABsElEQVQoz02S624aMRCFef/n6Av0R9OiSJSQ0MASSEtCIpImahMuhb2w7OK92Lv+OmtQVUtHY43HZ84cu1Vog6kqmtVEbY4wlcWYWlA5aGOO+1Pe1ZzOteOoabhaWhekacB8fkcUrahM4chtmWPzPXUmyBOs5G2psFkMJwHWWGzNcS+ppkGrkCI//M3Au2C5fpZuB3JVCpFcNPpYXZeYx8/UaSQXRZnakSYHVGlZbnZ4k3uCfU6Si8JSZ7yv5vSu2jy/fieOffxNiC1ShoMB/f41558+8nD+QZTljj8JVoRBxDbOeHhZ0r+542URsFOalqkKZo9Dur0zbqeXBOEaleaUh4iR59HtdPCGI36+vrGPEwqVOUKVZqyDhJsfc4aTGfdP78QNYaOwGXV82+fX20z8DNn+CUBGn4wnXF70GF4P6HW7tL+0uez1nLehH7HYxFx5U/qjKd/GM3wZu6VF4WL1ROfrmSgdyWspcRjnYVUoal1KlFErTVUWZPtQmmWuJss12yB28MOEvBAPm2cvcyUm78hUQi3fgxpnvi0O/1DnqYtoeX35Og0a0v9XLV/nLxEkXDAchzAGAAAAAElFTkSuQmCC'); background-size: cover; display: block;\"\n  ></span>\n  <img\n        class=\"gatsby-resp-image-image\"\n        alt=\"Spark Stream\"\n        title=\"Spark Stream\"\n        src=\"/static/f38e45df01c1e23d51659e92c82a4c85/0a47e/spark-stream.png\"\n        srcset=\"/static/f38e45df01c1e23d51659e92c82a4c85/8a4e8/spark-stream.png 150w,\n/static/f38e45df01c1e23d51659e92c82a4c85/5a46d/spark-stream.png 300w,\n/static/f38e45df01c1e23d51659e92c82a4c85/0a47e/spark-stream.png 600w,\n/static/f38e45df01c1e23d51659e92c82a4c85/02cd5/spark-stream.png 821w\"\n        sizes=\"(max-width: 600px) 100vw, 600px\"\n        style=\"width:100%;height:100%;margin:0;vertical-align:middle;position:absolute;top:0;left:0;\"\n        loading=\"lazy\"\n      />\n  </a>\n    </span></p>\n<p><span\n      class=\"gatsby-resp-image-wrapper\"\n      style=\"position: relative; display: block; margin-left: auto; margin-right: auto; max-width: 600px; \"\n    >\n      <a\n    class=\"gatsby-resp-image-link\"\n    href=\"/static/ca3b7b3f89c0857e5d74489e09d74648/e9c9b/micro-batches.png\"\n    style=\"display: block\"\n    target=\"_blank\"\n    rel=\"noopener\"\n  >\n    <span\n    class=\"gatsby-resp-image-background-image\"\n    style=\"padding-bottom: 68.66666666666667%; position: relative; bottom: 0; left: 0; background-image: url('data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAAOCAYAAAAvxDzwAAAACXBIWXMAAA7DAAAOwwHHb6hkAAACX0lEQVQ4y4WT228SQRTG9y/3waCxmqqNDehLow/G+GJiiqmtTRsa28S2SlvYUmDZXRZY7pclsIW98PnNcLFFopt8zMyy5zdzzvlGQQhMgslC83U2m0U+n4eu61BVFblcjmsNvhfKb0J+Ey7FCCl3Qfc0e4JwQuH+w3d/5lP4FAooc0DAl7d+gKEXwKXGnE8Y6HEuAL4fItV08MWo45NmY1uv4bzehTv2p9wZVJnwRwC6ozE6S3LGHjyCa4NbfMiW8TypS72YjevJArauLZT67gKqDD0f7RUwoS6BTXeEj7kK1n8VELsyEb0jsd64MDg30ByOmDNPKANnwX0WS4opdrmR43m4aDiIM73XKRObl8ZfEmBx2s+F6h0gYa2hC7Wgw6jWcFO0YHd7PLmHA6uJfauB91kbmykLsXQJMbWEaNpajAIc44at4XgKdFjwHk9zmkrhLK3inDapdDrosCbb3HnXauHtjys8iCfw5OAMj3ZP8Hj/FA93jrGWSCJKmKir2XOh9JhavlxBStNwY5rIUNcGxfXxZRp7Rg1HdgfvfmYQ+XaKNQIjBD49Ssox8vWEwCKBBRSdIRSRrlaxUahWkS+VZLoZswjDtnniNA6LdRyWWv9N+Q2hvVum3JYpB7IZzrwpYs4yCDOo7T69V2OAiVcrmiI6LRywY9aldZTBP2zT5o49ZhDXq3jGoOiSbeYd3lItOGyg7LK4DQK6CihgQRhiMPLlzdi4mBmacDG+pIRH2+5YwqSx51fPZ5ouwQIuzD5iGcJwds8n028sFv17pYW9YgOJcgtad7D4f371fgOSsP7xgf9hSQAAAABJRU5ErkJggg=='); background-size: cover; display: block;\"\n  ></span>\n  <img\n        class=\"gatsby-resp-image-image\"\n        alt=\"Micro Batches\"\n        title=\"Micro Batches\"\n        src=\"/static/ca3b7b3f89c0857e5d74489e09d74648/0a47e/micro-batches.png\"\n        srcset=\"/static/ca3b7b3f89c0857e5d74489e09d74648/8a4e8/micro-batches.png 150w,\n/static/ca3b7b3f89c0857e5d74489e09d74648/5a46d/micro-batches.png 300w,\n/static/ca3b7b3f89c0857e5d74489e09d74648/0a47e/micro-batches.png 600w,\n/static/ca3b7b3f89c0857e5d74489e09d74648/e9c9b/micro-batches.png 627w\"\n        sizes=\"(max-width: 600px) 100vw, 600px\"\n        style=\"width:100%;height:100%;margin:0;vertical-align:middle;position:absolute;top:0;left:0;\"\n        loading=\"lazy\"\n      />\n  </a>\n    </span></p>\n<h2 id=\"spark-actions\" style=\"position:relative;\"><a href=\"#spark-actions\" aria-label=\"spark actions permalink\" class=\"anchor before\"><svg aria-hidden=\"true\" focusable=\"false\" height=\"16\" version=\"1.1\" viewBox=\"0 0 16 16\" width=\"16\"><path fill-rule=\"evenodd\" d=\"M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z\"></path></svg></a>Spark Actions</h2>\n<h2 id=\"spark-etl-ecl---cleansing\" style=\"position:relative;\"><a href=\"#spark-etl-ecl---cleansing\" aria-label=\"spark etl ecl   cleansing permalink\" class=\"anchor before\"><svg aria-hidden=\"true\" focusable=\"false\" height=\"16\" version=\"1.1\" viewBox=\"0 0 16 16\" width=\"16\"><path fill-rule=\"evenodd\" d=\"M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z\"></path></svg></a>Spark ETL (ECL - Cleansing)</h2>\n<p><span\n      class=\"gatsby-resp-image-wrapper\"\n      style=\"position: relative; display: block; margin-left: auto; margin-right: auto; max-width: 600px; \"\n    >\n      <a\n    class=\"gatsby-resp-image-link\"\n    href=\"/static/12f09616d8ba7edfa90dda8229bb5621/d0143/etl1.png\"\n    style=\"display: block\"\n    target=\"_blank\"\n    rel=\"noopener\"\n  >\n    <span\n    class=\"gatsby-resp-image-background-image\"\n    style=\"padding-bottom: 21.333333333333336%; position: relative; bottom: 0; left: 0; background-image: url('data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAAECAYAAACOXx+WAAAACXBIWXMAAA7DAAAOwwHHb6hkAAAA4ElEQVQY0z2M226CUBBF+f9f8KX9Bx/8gyq0CL2kCUGQUkrl2kY44wHN6gFNH1b27FmZsUROiFKcROasZaCUcWaalbruxaS6Ib2ad1cn5O0PXdfDeMZqvzOKF4fUXfO7e2eVlCz8iMU2ZJm2tEnIp/vAl7eh2Yc0cUDu21SvTzTJjtrcZNu1+fGIVAesKs9IPZtka1MEb6yinLvniHvDMi4ozZMP41PPoYhDiihg725IfYcqjcnNzeQz06UusbQeGfTAOOVpoNNnjsNlZpq1ccMNbfy1j/99ysOxR4mG84U/6icppoLw+WcAAAAASUVORK5CYII='); background-size: cover; display: block;\"\n  ></span>\n  <img\n        class=\"gatsby-resp-image-image\"\n        alt=\"Spark ETL\"\n        title=\"Spark ETL\"\n        src=\"/static/12f09616d8ba7edfa90dda8229bb5621/0a47e/etl1.png\"\n        srcset=\"/static/12f09616d8ba7edfa90dda8229bb5621/8a4e8/etl1.png 150w,\n/static/12f09616d8ba7edfa90dda8229bb5621/5a46d/etl1.png 300w,\n/static/12f09616d8ba7edfa90dda8229bb5621/0a47e/etl1.png 600w,\n/static/12f09616d8ba7edfa90dda8229bb5621/1cfc2/etl1.png 900w,\n/static/12f09616d8ba7edfa90dda8229bb5621/d0143/etl1.png 1025w\"\n        sizes=\"(max-width: 600px) 100vw, 600px\"\n        style=\"width:100%;height:100%;margin:0;vertical-align:middle;position:absolute;top:0;left:0;\"\n        loading=\"lazy\"\n      />\n  </a>\n    </span></p>\n<p>For different data source, the solution is to use a <code class=\"language-text\">Data warehourse</code> to store information from different sources in a uniform structure using <code class=\"language-text\">ETL</code>.</p>\n<p>In Production environment, it will be extremely rare that you will be working on a local filesystem and chances are high that you will be working on distributed file systems such as HDFS and Amazon S3</p>\n<p>Hadoop Distributed File System (HDFS) is a distributed, scalable, and portable filesystem written in Java for Hadoop framework. Spark allows you to read data from HDFS in a very similar way that you would read from a typical filesystem, with the only difference being pointing towards the NameNode and the HDFS port.</p>\n<p>S3 stands for Simple Storage Service, an online storage service provided by Amazon Web Services. The core principles of S3 include scalability, high-availability, low-latency, and low-pricing. S3 provides amazing speed when your cluster is inside Amazon EC2, but the performance can be a nightmare if you are accessing large amounts of data over public Internet</p>\n<blockquote>\n<p>NoSQL</p>\n</blockquote>\n<div class=\"gatsby-highlight\" data-language=\"text\"><pre style=\"counter-reset: linenumber NaN\" class=\"language-text line-numbers\"><code class=\"language-text\">The most popular No SQL databases include:\n* Cassandra\n* Hbase\n* MongoDB\n* Solr\n* Couchbase</code><span aria-hidden=\"true\" class=\"line-numbers-rows\" style=\"white-space: normal; width: auto; left: 0;\"><span></span><span></span><span></span><span></span><span></span><span></span></span></pre></div>\n<p><code class=\"language-text\">Couchbase</code> works with <code class=\"language-text\">Spark</code>, <code class=\"language-text\">Kafka</code>, <code class=\"language-text\">Hadoop</code>, <code class=\"language-text\">Elasticsearch</code>, <code class=\"language-text\">Solr</code>, <code class=\"language-text\">JDBC</code>.</p>\n<h2 id=\"spark-core-part-2\" style=\"position:relative;\"><a href=\"#spark-core-part-2\" aria-label=\"spark core part 2 permalink\" class=\"anchor before\"><svg aria-hidden=\"true\" focusable=\"false\" height=\"16\" version=\"1.1\" viewBox=\"0 0 16 16\" width=\"16\"><path fill-rule=\"evenodd\" d=\"M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z\"></path></svg></a>Spark Core Part 2</h2>\n<blockquote>\n<p>DataFrame is a distributed collection of data organized into named columns. It is conceptually equivalent to a table in a relational database or a data frame in R/Python, but with richer optimizations under the hood. </p>\n</blockquote>\n<blockquote>\n<p>Dataset is a strongly typed collection of domain-specific objects that can be transformed in parallel using functional or relational operations. Each Dataset also has an untyped view called a DataFrame, which is a Dataset of Row. </p>\n</blockquote>\n<h3 id=\"rdd-dataframe-dataset\" style=\"position:relative;\"><a href=\"#rdd-dataframe-dataset\" aria-label=\"rdd dataframe dataset permalink\" class=\"anchor before\"><svg aria-hidden=\"true\" focusable=\"false\" height=\"16\" version=\"1.1\" viewBox=\"0 0 16 16\" width=\"16\"><path fill-rule=\"evenodd\" d=\"M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z\"></path></svg></a>RDD, DataFrame, DataSet</h3>\n<p><span\n      class=\"gatsby-resp-image-wrapper\"\n      style=\"position: relative; display: block; margin-left: auto; margin-right: auto; max-width: 600px; \"\n    >\n      <a\n    class=\"gatsby-resp-image-link\"\n    href=\"/static/99212ebb4ae105119288d0a14aac591b/cda19/spark-api-conceptions.png\"\n    style=\"display: block\"\n    target=\"_blank\"\n    rel=\"noopener\"\n  >\n    <span\n    class=\"gatsby-resp-image-background-image\"\n    style=\"padding-bottom: 66%; position: relative; bottom: 0; left: 0; background-image: url('data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAANCAYAAACpUE5eAAAACXBIWXMAAA7DAAAOwwHHb6hkAAACkUlEQVQ4y32S61NSURTF+T/7ZE91anpQptPoh8psatRS0zJBknwUpVkyIhKCaQ/FvIWliIqhKMhLSxOBe3+d68Uga9oza+4595yz9tprb10iniASjhCLRllfWxfrMPFYTOw1JOJxwuthwuJOMrmJIoOcVVByQOwLoZMzMtl09i+o/w/OMnsZFLEmK6AoeciKuKPhIIHucIZC7CvIxV5aYfMnrCUyrCUzJLZlUqmsdqjkVeuUAvmHoUZ8c5e3c3FMg14qG3upbhsSsHHrkZPn40Hez0bZ/J7SeP9HqIY/tEWnO4jRGaJtcI7ajtfc7hih1uyiuddDqz1Asy3Ak7EQy5Gd/Tc6RfhAVmXXypRzZKvRHVoGF9G3uDl+o49y4wQPHStY3oXpcC5z+YGbU7dtFNcOcb7RQav1K5FkSnioksmawYWds06E0Ld5OHnLStHVp5y5N8ZFo4TJscT1ro8cqerZT1QiCI/dHKCqw4N9OpxTqJLlCFWDVbMNw0sU1w0LFVYuGacpN8+gN0zT8NJHpVnihEhUWj/MmUaX+DooM0wIe74VlCzn1WXFiHSOBDjb6uGSYYqydokyk4ReKLw/uMD1Hi9F1f0cvfaUEzUvKLnjpOqxxLPxlb8JDzycmo9xxfyZC8ZPGgwSFe1TmIb9NPV7Od3gpLTBzbnWSSoeSTQN+PEGkn8S/p693Pg5vTHq+75Q0yNR1z9P18gijx0+zPZZutzL3H3lp/aFTzRvAdfnyL6YfFOyebJ0Kk1wKUBwNcJrSztWyxO6x8OY7D7qut/S8mqGntEgna4AlrEg04sJrTKZf3ioEu7uYbfZeONy827UxcZGXMzkNh9mIzgmA7ikNab8CXwrW/zYSWvl5Kr7BUb6lhX0DLUOAAAAAElFTkSuQmCC'); background-size: cover; display: block;\"\n  ></span>\n  <img\n        class=\"gatsby-resp-image-image\"\n        alt=\"Spark API conceptions\"\n        title=\"Spark API conceptions\"\n        src=\"/static/99212ebb4ae105119288d0a14aac591b/0a47e/spark-api-conceptions.png\"\n        srcset=\"/static/99212ebb4ae105119288d0a14aac591b/8a4e8/spark-api-conceptions.png 150w,\n/static/99212ebb4ae105119288d0a14aac591b/5a46d/spark-api-conceptions.png 300w,\n/static/99212ebb4ae105119288d0a14aac591b/0a47e/spark-api-conceptions.png 600w,\n/static/99212ebb4ae105119288d0a14aac591b/cda19/spark-api-conceptions.png 785w\"\n        sizes=\"(max-width: 600px) 100vw, 600px\"\n        style=\"width:100%;height:100%;margin:0;vertical-align:middle;position:absolute;top:0;left:0;\"\n        loading=\"lazy\"\n      />\n  </a>\n    </span></p>\n<p><span\n      class=\"gatsby-resp-image-wrapper\"\n      style=\"position: relative; display: block; margin-left: auto; margin-right: auto; max-width: 600px; \"\n    >\n      <a\n    class=\"gatsby-resp-image-link\"\n    href=\"/static/2a4e3307f415dda4d1626b0931b16777/0f7d5/spark-apis.png\"\n    style=\"display: block\"\n    target=\"_blank\"\n    rel=\"noopener\"\n  >\n    <span\n    class=\"gatsby-resp-image-background-image\"\n    style=\"padding-bottom: 55.333333333333336%; position: relative; bottom: 0; left: 0; background-image: url('data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAALCAYAAAB/Ca1DAAAACXBIWXMAAA7DAAAOwwHHb6hkAAAB40lEQVQoz2VSSY7cMAz0/x+RSzJzmCxAvpFTLkmmZzrd6cVLe5Ml2Zb3SlF2BwPEACGyRBbJkgNTaeiyhFHKn7aq6FfAOAELgBnrefffxsv/cfA91Xj384LHXYQPzyHe0z7tYzy9xnjYxXh8kTPCR8ZfDwme9gkeNuwz875s8T0viCuDb68H/IpShHWPq3U4FBovtwJHZXEsDS7G4XeusM9K/HmDSXzHxC6mRdA0LW5RhCLLMLgO8g1dB6srxg4jfcyL92uj/d3Y9R7rW4fGGuZqtNauK5u6wfF0QprlGIYJ07TAuR4Nk7t+9LGYZV4uWtv6H6aNRUm9S+pelAotB/KEl+vVX8i0PUmEUPGxmrbFOEqT2TdQzCn4eLau0Q8jataWjKW20obNLIKaJGma0jLknHIiQc+VxBa+4EyymdN07O5kauLijyQ0xrCJInHt84d+WAlDahjGMRzBmSQtCwUX67rBT+iEZCT5vGwrz/5epJAmgo3EAgFO57MnXYsmEnZc322EvSdq/XSDj0UWweReatZ48hZIsZCJjklyw8KXkiJNTVay9QcXwdWm80QywcTXXFskEXnEgjjN8eN5h3MYQfHVKlPjlhdIqKe2DZResSQrPC45irGckpMWJUrmqM3+Ar7eR7xLAlxCAAAAAElFTkSuQmCC'); background-size: cover; display: block;\"\n  ></span>\n  <img\n        class=\"gatsby-resp-image-image\"\n        alt=\"Spark API comparasion\"\n        title=\"Spark API comparasion\"\n        src=\"/static/2a4e3307f415dda4d1626b0931b16777/0a47e/spark-apis.png\"\n        srcset=\"/static/2a4e3307f415dda4d1626b0931b16777/8a4e8/spark-apis.png 150w,\n/static/2a4e3307f415dda4d1626b0931b16777/5a46d/spark-apis.png 300w,\n/static/2a4e3307f415dda4d1626b0931b16777/0a47e/spark-apis.png 600w,\n/static/2a4e3307f415dda4d1626b0931b16777/1cfc2/spark-apis.png 900w,\n/static/2a4e3307f415dda4d1626b0931b16777/0f7d5/spark-apis.png 993w\"\n        sizes=\"(max-width: 600px) 100vw, 600px\"\n        style=\"width:100%;height:100%;margin:0;vertical-align:middle;position:absolute;top:0;left:0;\"\n        loading=\"lazy\"\n      />\n  </a>\n    </span></p>\n<p><span\n      class=\"gatsby-resp-image-wrapper\"\n      style=\"position: relative; display: block; margin-left: auto; margin-right: auto; max-width: 600px; \"\n    >\n      <a\n    class=\"gatsby-resp-image-link\"\n    href=\"/static/4b3338d160958afdc4b20c8112738907/6c745/spark-structured-api.png\"\n    style=\"display: block\"\n    target=\"_blank\"\n    rel=\"noopener\"\n  >\n    <span\n    class=\"gatsby-resp-image-background-image\"\n    style=\"padding-bottom: 50.66666666666667%; position: relative; bottom: 0; left: 0; background-image: url('data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAAKCAYAAAC0VX7mAAAACXBIWXMAAA7DAAAOwwHHb6hkAAACPklEQVQoz01SXU8TURTsPzVqEB94QtAHY3wiMYhpwApKCBhDVDQaXhoFgW6r2N122/28bbcfS1u79IN2lxZ5HmfbanyYnLlz5s49Z7MRvdXFj3IdarMDs3sJoxtMMeHWxZAYTUHe+/88+tc3qNeC34hI7jn2zQrixToOqh6+lJtjxEt1fK20kGgNkPR8SC2fNUDyPJjyiSZ5E37yawCdoRGlcwk7uEah56PQ9WEOriBTqwRXEJ0+yv4IoUfnFKLdh+X1IOg5Pffh0OP06f3rCQPTbKgkeicYQ+kO8Y0vZ3sjpL0B8uwlOFmqPYTS9iF7ffaGOOBECj0Z+jU+dsrJx4Hxsy7mTwwsJgzME0tyCa/zDjazReywPs04iKkOttUitoiNnINlarvaRHsz9TxUKlA5ZWS/1sHtEwt3ExZuHFtYV2xcOibK2Z8YlnR8zttQTBOeJqOZl1GyTeyqFq5rBTSMLK6rNvZyAjOpIgMDRA4bF3iinWFFb2BZr2NVc7Fj1PBSr2HTcBHlOUa+Y1SxxfpCq+FZ3sVG2Ce26X1ObYUZOf4ZkbjLlRM6Fo413JdMPEiXsUhEM0WsyQIxRWCVfIHakuJgQy1hnZ8jrI9lB/eo3087eMSVs+GE4cq3uPKdhD3GTLKAuZSAzdVE6giGdAhHmJiVBD7lBYaOgWpOwagqEJVt3JQKmE0KzH0vItNm4FHzAlGjjjWzMUHIibfiDB+IPeHiHesqtW2rjo/k72133Htl8hMZkzsx3g1X/gPkgKmWz2WElQAAAABJRU5ErkJggg=='); background-size: cover; display: block;\"\n  ></span>\n  <img\n        class=\"gatsby-resp-image-image\"\n        alt=\"Spark Structured API\"\n        title=\"Spark Structured API\"\n        src=\"/static/4b3338d160958afdc4b20c8112738907/0a47e/spark-structured-api.png\"\n        srcset=\"/static/4b3338d160958afdc4b20c8112738907/8a4e8/spark-structured-api.png 150w,\n/static/4b3338d160958afdc4b20c8112738907/5a46d/spark-structured-api.png 300w,\n/static/4b3338d160958afdc4b20c8112738907/0a47e/spark-structured-api.png 600w,\n/static/4b3338d160958afdc4b20c8112738907/6c745/spark-structured-api.png 893w\"\n        sizes=\"(max-width: 600px) 100vw, 600px\"\n        style=\"width:100%;height:100%;margin:0;vertical-align:middle;position:absolute;top:0;left:0;\"\n        loading=\"lazy\"\n      />\n  </a>\n    </span></p>\n<h2 id=\"spark-on-kubernetes\" style=\"position:relative;\"><a href=\"#spark-on-kubernetes\" aria-label=\"spark on kubernetes permalink\" class=\"anchor before\"><svg aria-hidden=\"true\" focusable=\"false\" height=\"16\" version=\"1.1\" viewBox=\"0 0 16 16\" width=\"16\"><path fill-rule=\"evenodd\" d=\"M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z\"></path></svg></a>Spark on Kubernetes</h2>\n<p><span\n      class=\"gatsby-resp-image-wrapper\"\n      style=\"position: relative; display: block; margin-left: auto; margin-right: auto; max-width: 600px; \"\n    >\n      <a\n    class=\"gatsby-resp-image-link\"\n    href=\"/static/bf05a3be9315cd062960d284ca489efb/73b94/spark-on-kubernetes.png\"\n    style=\"display: block\"\n    target=\"_blank\"\n    rel=\"noopener\"\n  >\n    <span\n    class=\"gatsby-resp-image-background-image\"\n    style=\"padding-bottom: 50.66666666666667%; position: relative; bottom: 0; left: 0; background-image: url('data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAAKCAYAAAC0VX7mAAAACXBIWXMAAA7DAAAOwwHHb6hkAAABxElEQVQoz52SSY/TQBBG/f//BUIICXFA4swFJKQ5gGaSmSTOOl6yeG873u34UXYmkeFIy59adrtfVX1VWhRGBH5AEIRUZQ0X6Nruv6V1rRAu3VXt2y6f7qs/bkdn8jACXEYagJso5fvGHrRTKU3TcZRd3zvoB5fgnA3cpGo5nAuMMMYraurmcoX3wW+7SJu4MV90i6+rPRNXsfATfp9C3j+u+Djd8tP2iQXgqgTTeMXarrFdl6ioKKqGoGhwskqCNMRlg9Y2LXVZURYljfzQR/Hyim+Gww+B6WHCVAKFkuneMjmKHM9jGZ558hSW47LR51imgRXFaIWAPNfDlYMe2q8nJ+Ldw5QPvxbM/ZiJvG+8iP1aZ/cywbYtavHez0psz+dhrrM2LU7JGS3PC+myTyhdztJ8AD57CZ+ed3yemcz6LMTnma9YHhxezD2vgUIXa3TJ0pG7jwudrWFwjAWYZYVk6OJLGSqKhk7V0phcmpCKJ/lgQ0dZN6i8JBalYpGfFtiipWGylpJXq6UEiq4l99nFSglQUYnZt47dxuk2Evfv7XV88rrlmJZ3hXJ3KNk9nXBE/ZDnWf73sL5BuvHAX/4JMNIfSsj8FnU8i44AAAAASUVORK5CYII='); background-size: cover; display: block;\"\n  ></span>\n  <img\n        class=\"gatsby-resp-image-image\"\n        alt=\"Spark on Kubernetes\"\n        title=\"Spark on Kubernetes\"\n        src=\"/static/bf05a3be9315cd062960d284ca489efb/0a47e/spark-on-kubernetes.png\"\n        srcset=\"/static/bf05a3be9315cd062960d284ca489efb/8a4e8/spark-on-kubernetes.png 150w,\n/static/bf05a3be9315cd062960d284ca489efb/5a46d/spark-on-kubernetes.png 300w,\n/static/bf05a3be9315cd062960d284ca489efb/0a47e/spark-on-kubernetes.png 600w,\n/static/bf05a3be9315cd062960d284ca489efb/1cfc2/spark-on-kubernetes.png 900w,\n/static/bf05a3be9315cd062960d284ca489efb/73b94/spark-on-kubernetes.png 1004w\"\n        sizes=\"(max-width: 600px) 100vw, 600px\"\n        style=\"width:100%;height:100%;margin:0;vertical-align:middle;position:absolute;top:0;left:0;\"\n        loading=\"lazy\"\n      />\n  </a>\n    </span></p>\n<p><span\n      class=\"gatsby-resp-image-wrapper\"\n      style=\"position: relative; display: block; margin-left: auto; margin-right: auto; max-width: 600px; \"\n    >\n      <a\n    class=\"gatsby-resp-image-link\"\n    href=\"/static/13b12970459ea9c98ec73e74382e0073/350de/cluster-managers-comparison.png\"\n    style=\"display: block\"\n    target=\"_blank\"\n    rel=\"noopener\"\n  >\n    <span\n    class=\"gatsby-resp-image-background-image\"\n    style=\"padding-bottom: 54%; position: relative; bottom: 0; left: 0; background-image: url('data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAALCAYAAAB/Ca1DAAAACXBIWXMAAA7DAAAOwwHHb6hkAAABcElEQVQoz41T2bLrIAzL/39n9gQKBLITHctteno79+FkqmEZ17JkU5jRwDuPcRgx9D2mMOmemNMMXPI7rz+jyPspm4x93bEtG045H9uBfdtxHuc7EBmKz/03NGGQRFFw8ZLf9YVXYBSSVciQL+QjKzEJFfsTjC16kdb1A4KftEJWus6r7pd50TMTJ7krywp1VcMYqza5h0Oc4hua0FkLawz6rlcYITDjqMHeOWE+NJDWBElCTCEgxYQ0JeQzv9WoZGYOPihjcAFRmkJJhPr1Csz08vq/JYq7KetL+6dft0dsDsF7+uzSgng3Tir/9I//Uclt06KqKkXbNCpT/bHPlbIYSC+buhZbOvQyXgSt6dpWMYhVJC+YpJMgyqZ8esN1lpVV6MeEMkYkJVEMUUGyO/4esYJVPOxDmxC81wq1s/h37laRWkmXW6nGymNgdXwUVjrOR8A8Tw9lNG4WMvL8Pf235LIstcrfJnp9WTwT9PIHAdNWv5UOS5QAAAAASUVORK5CYII='); background-size: cover; display: block;\"\n  ></span>\n  <img\n        class=\"gatsby-resp-image-image\"\n        alt=\"cluster-managers-comparison.png\"\n        title=\"cluster-managers-comparison.png\"\n        src=\"/static/13b12970459ea9c98ec73e74382e0073/0a47e/cluster-managers-comparison.png\"\n        srcset=\"/static/13b12970459ea9c98ec73e74382e0073/8a4e8/cluster-managers-comparison.png 150w,\n/static/13b12970459ea9c98ec73e74382e0073/5a46d/cluster-managers-comparison.png 300w,\n/static/13b12970459ea9c98ec73e74382e0073/0a47e/cluster-managers-comparison.png 600w,\n/static/13b12970459ea9c98ec73e74382e0073/1cfc2/cluster-managers-comparison.png 900w,\n/static/13b12970459ea9c98ec73e74382e0073/350de/cluster-managers-comparison.png 998w\"\n        sizes=\"(max-width: 600px) 100vw, 600px\"\n        style=\"width:100%;height:100%;margin:0;vertical-align:middle;position:absolute;top:0;left:0;\"\n        loading=\"lazy\"\n      />\n  </a>\n    </span></p>\n<h2 id=\"build-management-at-glance\" style=\"position:relative;\"><a href=\"#build-management-at-glance\" aria-label=\"build management at glance permalink\" class=\"anchor before\"><svg aria-hidden=\"true\" focusable=\"false\" height=\"16\" version=\"1.1\" viewBox=\"0 0 16 16\" width=\"16\"><path fill-rule=\"evenodd\" d=\"M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z\"></path></svg></a>Build management at Glance</h2>\n<p>(1)</p>\n<ul>\n<li>SBT (Simple Build Tool), Scala, DSL, </li>\n<li>Gradle (Groovy)</li>\n<li>Maven, JVM language Scala, XML</li>\n</ul>\n<p>(2)</p>\n<ul>\n<li>yaml, yml</li>\n</ul>\n<p>(3) Running in Standalone Mode</p>\n<ul>\n<li>Spark Cluster (Spark Master is running :7077)</li>\n<li>Spark Workers (web UI :8080)</li>\n<li>spark-submit to submit application</li>\n<li>Spark Driver -> Web UI\nSpark UI is available only while Dirver is running.</li>\n</ul>\n<h2 id=\"verb\" style=\"position:relative;\"><a href=\"#verb\" aria-label=\"verb permalink\" class=\"anchor before\"><svg aria-hidden=\"true\" focusable=\"false\" height=\"16\" version=\"1.1\" viewBox=\"0 0 16 16\" width=\"16\"><path fill-rule=\"evenodd\" d=\"M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z\"></path></svg></a>Verb.</h2>\n<ul>\n<li>discretize - 离散化</li>\n<li>shuffling 洗牌</li>\n<li>coarse-grained 粗粒度</li>\n<li>coalesce - 合并</li>\n<li>Terminology 术语</li>\n<li>resilient - 弹性</li>\n<li>experimental - 试验</li>\n<li>coalesce - 合并</li>\n<li>Monolithic - 单片</li>\n<li>artifact 神器</li>\n</ul>","tableOfContents":"<ul>\n<li>\n<p><a href=\"#spark\">Spark</a></p>\n<ul>\n<li><a href=\"#1-spark-vs-hadoop-mapreduce\">1. Spark vs. Hadoop MapReduce</a></li>\n<li><a href=\"#2-spark-architecture-and-components\">2. Spark Architecture and Components</a></li>\n<li>\n<p><a href=\"#spark-installation\">Spark Installation</a></p>\n<ul>\n<li><a href=\"#scala-vs-kotlin\">Scala vs Kotlin:</a></li>\n</ul>\n</li>\n<li><a href=\"#rdd\">RDD</a></li>\n<li><a href=\"#spark-actions\">Spark Actions</a></li>\n<li><a href=\"#spark-etl-ecl---cleansing\">Spark ETL (ECL - Cleansing)</a></li>\n<li>\n<p><a href=\"#spark-core-part-2\">Spark Core Part 2</a></p>\n<ul>\n<li><a href=\"#rdd-dataframe-dataset\">RDD, DataFrame, DataSet</a></li>\n</ul>\n</li>\n<li><a href=\"#spark-on-kubernetes\">Spark on Kubernetes</a></li>\n<li><a href=\"#build-management-at-glance\">Build management at Glance</a></li>\n<li><a href=\"#verb\">Verb.</a></li>\n</ul>\n</li>\n</ul>"},"previous":{"fields":{"id":"07672f34-d70e-589c-ba8e-67e284bc1d8c","title":"5 Hive","slug":"/bigdata/5-hive/"},"excerpt":"What is Hive? is an open-source data warehouse system built on top of Hadoop for querying and analyzing large datasets is a NOT relational…"},"next":{"fields":{"id":"4dab467f-b6ea-531b-8c28-9bcef71863e4","title":"7 Docker","slug":"/bigdata/7-docker/"},"excerpt":"Docker, Jupyter, DLAB  1. Docker Amazon, Azure, GCP EC2 Lambda S3 ETL  2. DLAB Aure / AWS /GCP 3. Jupyter DLab Jenkins Puppet Chef Ansible…"}},"pageContext":{"id":"bf3a9766-31fd-5213-8dd9-7c38ebe7bb80","prevId":"07672f34-d70e-589c-ba8e-67e284bc1d8c","nextId":"4dab467f-b6ea-531b-8c28-9bcef71863e4"}},"staticQueryHashes":["1576573137","63159454"]}